#load packages
library(RADstackshelpR)
library(gridExtra)
library(ggplot2)
##Step 1: Demultiplex each sample using ‘process_radtags’. ##Step 2: Use fastqcR to remove samples that didn’t generate a meaningful amount of sequence data. ##Step 3: Iterate over values of m ranging from 3-7, while leaving all other parameters at default values. ##Step 4: Visualize the output of these 5 runs and determine the optimal value for m.
#optimize_m function will generate summary stats on your 5 iterative runs
#input can be full path to each file or just the file name if your working directory contains the files
m.out<-optimize_m(m3="/Users/devder/Desktop/hipposideros/m3.vcf",
m4="/Users/devder/Desktop/hipposideros/m4.vcf",
m5="/Users/devder/Desktop/hipposideros/m5.vcf",
m6="/Users/devder/Desktop/hipposideros/m6.vcf",
m7="/Users/devder/Desktop/hipposideros/m7.vcf")
#visualize the effect of varying m on the depth of each sample
vis_depth(output = m.out)
## [1] "Visualize how different values of m affect average depth in each sample"
## Picking joint bandwidth of 8.84
#visualize the effect of varying m on the number of SNPs retained
vis_snps(output = m.out, stacks_param = "m")
## Visualize how different values of m affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes the total number of SNPs retained at an 80% completeness cutoff.
## Picking joint bandwidth of 5870
#visualize the effect of varying m on the number of loci retained
vis_loci(output = m.out, stacks_param = "m")
## Visualize how different values of m affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes the total number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
## Picking joint bandwidth of 2960
#3 is the optimal m value, and will be used next to optimize M
##Step 5: Iterate over values of M ranging from 1-8, setting m to the optimal value (here 3). ##Step 6: Visualize the output of these 8 runs and determine the optimal value for M.
#optimize M
M.out<-optimize_bigM(M1="/Users/devder/Desktop/hipposideros/bigM1.vcf",
M2="/Users/devder/Desktop/hipposideros/bigM2.vcf",
M3="/Users/devder/Desktop/hipposideros/bigM3.vcf",
M4="/Users/devder/Desktop/hipposideros/bigM4.vcf",
M5="/Users/devder/Desktop/hipposideros/bigM5.vcf",
M6="/Users/devder/Desktop/hipposideros/bigM6.vcf",
M7="/Users/devder/Desktop/hipposideros/bigM7.vcf",
M8="/Users/devder/Desktop/hipposideros/bigM8.vcf")
#visualize the effect of varying M on the number of SNPs retained
vis_snps(output = M.out, stacks_param = "M")
## Visualize how different values of M affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes the total number of SNPs retained at an 80% completeness cutoff.
## Picking joint bandwidth of 4990
#visualize the effect of varying M on the number of polymorphic loci retained
vis_loci(output = M.out, stacks_param = "M")
## Visualize how different values of M affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes the total number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
## Picking joint bandwidth of 2490
##Step 7: Iterate over values of n ranging from M-1, M, M+1, setting m and M to the optimal values (here 3 and 2, respectively). ##Step 8: Visualize the output of these 3 runs and determine the optimal value for n.
#optimize n
n.out<-optimize_n(nequalsMminus1="/Users/devder/Desktop/hipposideros/n1.vcf",
nequalsM="/Users/devder/Desktop/hipposideros/n2.vcf",
nequalsMplus1="/Users/devder/Desktop/hipposideros/n3.vcf")
#visualize the effect of varying n on the number of SNPs retained
vis_snps(output = n.out, stacks_param = "n")
## Visualize how different values of n affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes the total number of SNPs retained at an 80% completeness cutoff.
## Picking joint bandwidth of 5650
#visualize the effect of varying n on the number of polymorphic loci retained
vis_loci(output = n.out, stacks_param = "n")
## Visualize how different values of n affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes the total number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
## Picking joint bandwidth of 2700
#Finally, make a single figure showing the optimization process of all three parameters
gl<-list()
gl[[1]]<-vis_depth(output = m.out)
## [1] "Visualize how different values of m affect average depth in each sample"
gl[[2]]<-vis_snps(output = m.out, stacks_param = "m")
## Visualize how different values of m affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes the total number of SNPs retained at an 80% completeness cutoff.
gl[[3]]<-vis_loci(output = m.out, stacks_param = "m")
## Visualize how different values of m affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes the total number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
gl[[4]]<-vis_snps(output = M.out, stacks_param = "M")
## Visualize how different values of M affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes the total number of SNPs retained at an 80% completeness cutoff.
gl[[5]]<-vis_loci(output = M.out, stacks_param = "M")
## Visualize how different values of M affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes the total number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
gl[[6]]<-vis_snps(output = n.out, stacks_param = "n")
## Visualize how different values of n affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes the total number of SNPs retained at an 80% completeness cutoff.
gl[[7]]<-vis_loci(output = n.out, stacks_param = "n")
## Visualize how different values of n affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes the total number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
#visualize as a single plot
grid.arrange(
grobs = gl,
widths = c(1,1,1,1,1,1),
layout_matrix = rbind(c(1,1,2,2,3,3),
c(4,4,4,5,5,5),
c(6,6,6,7,7,7))
)
## Picking joint bandwidth of 8.84
## Picking joint bandwidth of 5870
## Picking joint bandwidth of 2960
## Picking joint bandwidth of 4990
## Picking joint bandwidth of 2490
## Picking joint bandwidth of 5650
## Picking joint bandwidth of 2700
#save plot
g<-arrangeGrob(grobs = gl, widths = c(1,1,1,1,1,1),
layout_matrix = rbind(c(1,1,2,2,3,3),
c(4,4,4,5,5,5),
c(6,6,6,7,7,7)))
## Picking joint bandwidth of 8.84
## Picking joint bandwidth of 5870
## Picking joint bandwidth of 2960
## Picking joint bandwidth of 4990
## Picking joint bandwidth of 2490
## Picking joint bandwidth of 5650
## Picking joint bandwidth of 2700
ggsave(file="~/Desktop/hipposideros/denovo.optimization.png", g, width = 11, height=9, units = "in")